In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing

from data_load import get_clean_data,normalize_1_variables,normalize_2_variables,normalize_3_variables,heatmapify
In [2]:
sns.set(style="whitegrid")
sns.set(style="ticks", color_codes=True)
sns.set(font_scale=2)

%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

# another cell
import matplotlib.pyplot as plt
%matplotlib inline


for i in range(10):
    plt.plot(range(10))
    plt.show()
UsageError: Line magic function `%%javascript` not found.
In [3]:
data = get_clean_data('quiz_data.csv',True)
print("there are "+str(len(data))+" responses from people satisfied with their program")
print(data.columns)
print(data.head())
there are 584 responses from people satisfied with their program
Index(['program', 'happy', 'problem_type', 'creative', 'industry', 'outdoors',
       'career', 'group_work', 'liked_courses', 'disliked_courses',
       'programming', 'join_clubs', 'not_clubs', 'liked_projects',
       'disliked_projects', 'tv_shows', 'alternate_degree',
       'expensive_equipment', 'drawing', 'essay', 'architecture', 'automotive',
       'business', 'construction', 'health', 'environment', 'manufacturing',
       'technology'],
      dtype='object')
   program happy problem_type           creative  \
id                                                 
0     mech   Yes      defined  somewhat_creative   
1     bmed   Yes  investigate  somewhat_creative   
2      swe   Yes      defined  somewhat_creative   
5     tron   Yes  investigate  somewhat_creative   
6     cive   Yes      defined           creative   

                                             industry outdoors       career  \
id                                                                            
0   Automotive (i.e. Designing a new autonomous ca...  limited     building   
1   Health (i.e. Creating technology for minimally...  indoors      sensors   
2   Technology (i.e. Working with cloud based soft...  limited  programming   
5   Automotive (i.e. Designing a new autonomous ca...  indoors      sensors   
6           Construction (i.e. Building a smart city)  limited   optimizing   

      group_work     liked_courses disliked_courses    ...      drawing essay  \
id                                                     ...                      
0   occasionally  computer_science          history    ...      partial   yes   
1            yes           biology      visual_arts    ...          bad   yes   
2   occasionally              math          history    ...      partial   yes   
5   occasionally              math        geography    ...      partial    no   
6   occasionally           physics        chemistry    ...      partial    no   

   architecture automotive business construction health environment  \
id                                                                    
0             0          1        0            1      0           0   
1             0          0        0            0      1           0   
2             0          0        0            0      0           0   
5             0          1        0            0      1           0   
6             0          0        0            1      0           0   

   manufacturing technology  
id                           
0              1          1  
1              0          0  
2              0          1  
5              0          1  
6              0          0  

[5 rows x 28 columns]

Summary of the data

In [10]:
print("Summary of the variable: program")
fig, axs = plt.subplots(1,2)
fig.suptitle('program')

data['program'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['program'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: program
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x1233bc978>
In [5]:
print("Summary of the variable: happy")
fig, axs = plt.subplots(1,2)
fig.suptitle('happy')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: happy
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x123342f60>
In [11]:
print("Summary of the variable: problem_type")
fig, axs = plt.subplots(1,2)
fig.suptitle('problem_type')

data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: problem_type
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x10fe11cf8>
In [12]:
print("Summary of the variable: creative")
fig, axs = plt.subplots(1,2)
fig.suptitle('creative')

data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: creative
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x120ffef60>
In [4]:
print("Summary of the variable: industry - architecture")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - architecture')

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - architecture
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b57e630>
In [5]:
print("Summary of the variable: industry - technology")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - technology')

data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - technology
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x10932e358>
In [6]:
print("Summary of the variable: industry - automotive")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - automotive')

data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - automotive
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x109fde470>
In [7]:
print("Summary of the variable: industry - business")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - business')

data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - business
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a0976d8>
In [8]:
print("Summary of the variable: industry - construction")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - construction')

data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - construction
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d6c9358>
In [9]:
print("Summary of the variable: industry - health")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - health')

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - health
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x11dad21d0>
In [10]:
print("Summary of the variable: industry - environment")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - environment')

data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - environment
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x11db7c278>
In [11]:
print("Summary of the variable: industry - manufacturing")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - manufacturing')

data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: industry - manufacturing
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x11ddcf940>
In [14]:
print("Summary of the variable: outdoors")
fig, axs = plt.subplots(1,2)
fig.suptitle('outdoors')

data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: outdoors
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x1210ad1d0>
In [15]:
print("Summary of the variable: career")
fig, axs = plt.subplots(1,2)
fig.suptitle('career')

data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: career
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x1235d6b70>
In [16]:
print("Summary of the variable: group_work")
fig, axs = plt.subplots(1,2)
fig.suptitle('group_work')

data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: group_work
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x1236b1908>
In [17]:
print("Summary of the variable: liked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_courses')

data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: liked_courses
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x123750e48>
In [18]:
print("Summary of the variable: disliked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_courses')

data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: disliked_courses
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x1245ec6a0>
In [19]:
print("Summary of the variable: programming")
fig, axs = plt.subplots(1,2)
fig.suptitle('programming')

data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: programming
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x12365ee48>
In [20]:
print("Summary of the variable: join_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('join_clubs')

data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: join_clubs
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x124be6b70>
In [21]:
print("Summary of the variable: not_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('not_clubs')

data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: not_clubs
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x124ebf550>
In [22]:
print("Summary of the variable: liked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_projects')

data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: liked_projects
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x1254082e8>
In [23]:
print("Summary of the variable: disliked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_projects')

data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: disliked_projects
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x1254c33c8>
In [24]:
print("Summary of the variable: tv_shows")
fig, axs = plt.subplots(1,2)
fig.suptitle('tv_shows')

data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: tv_shows
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x125a733c8>
In [25]:
print("Summary of the variable: alternate_degree")
fig, axs = plt.subplots(1,2)
fig.suptitle('alternate_degree')

data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: alternate_degree
Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x125b2d5f8>
In [26]:
print("Summary of the variable: expensive_equipment")
fig, axs = plt.subplots(1,2)
fig.suptitle('expensive_equipment')

data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: expensive_equipment
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x1260f94a8>
In [27]:
print("Summary of the variable: drawing")
fig, axs = plt.subplots(1,2)
fig.suptitle('drawing')

data['drawing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['drawing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: drawing
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x126199b38>
In [28]:
print("Summary of the variable: essay")
fig, axs = plt.subplots(1,2)
fig.suptitle('essay')

data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: essay
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x126424320>

Heat map of all attributes

In [29]:
plot_data = normalize_1_variables(data,"program","happy")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"happy",list(data["happy"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["happy"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. happy')
Out[29]:
Text(0.5, 1.0, 'program Vs. happy')
In [30]:
plot_data = normalize_1_variables(data,"program","problem_type")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"problem_type",list(data["problem_type"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["problem_type"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. problem_type')
Out[30]:
Text(0.5, 1.0, 'program Vs. problem_type')
In [31]:
plot_data = normalize_1_variables(data,"program","creative")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"creative",list(data["creative"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["creative"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. creative')
Out[31]:
Text(0.5, 1.0, 'program Vs. creative')
In [32]:
plot_data = normalize_1_variables(data,"program","outdoors")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"outdoors",list(data["outdoors"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["outdoors"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. outdoors')
Out[32]:
Text(0.5, 1.0, 'program Vs. outdoors')
In [33]:
plot_data = normalize_1_variables(data,"program","career")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"career",list(data["career"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["career"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. career')
Out[33]:
Text(0.5, 1.0, 'program Vs. career')
In [34]:
plot_data = normalize_1_variables(data,"program","group_work")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"group_work",list(data["group_work"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["group_work"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. group_work')
Out[34]:
Text(0.5, 1.0, 'program Vs. group_work')
In [35]:
plot_data = normalize_1_variables(data,"program","liked_courses")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"liked_courses",list(data["liked_courses"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["liked_courses"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. liked_courses')
Out[35]:
Text(0.5, 1.0, 'program Vs. liked_courses')
In [36]:
plot_data = normalize_1_variables(data,"program","disliked_courses")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"disliked_courses",list(data["disliked_courses"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["disliked_courses"].unique()))
fig, ax = plt.subplots(figsize=(12,12))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. disliked_courses')
Out[36]:
Text(0.5, 1.0, 'program Vs. disliked_courses')
In [37]:
plot_data = normalize_1_variables(data,"program","programming")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"programming",list(data["programming"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["programming"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. programming')
Out[37]:
Text(0.5, 1.0, 'program Vs. programming')
In [38]:
plot_data = normalize_1_variables(data,"program","join_clubs")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"join_clubs",list(data["join_clubs"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["join_clubs"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. join_clubs')
Out[38]:
Text(0.5, 1.0, 'program Vs. join_clubs')
In [39]:
plot_data = normalize_1_variables(data,"program","not_clubs")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"not_clubs",list(data["not_clubs"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["not_clubs"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. not_clubs')
Out[39]:
Text(0.5, 1.0, 'program Vs. not_clubs')
In [40]:
plot_data = normalize_1_variables(data,"program","liked_projects")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"liked_projects",list(data["liked_projects"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["liked_projects"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. liked_projects')
Out[40]:
Text(0.5, 1.0, 'program Vs. liked_projects')
In [41]:
plot_data = normalize_1_variables(data,"program","disliked_projects")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"disliked_projects",list(data["disliked_projects"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["disliked_projects"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. disliked_projects')
Out[41]:
Text(0.5, 1.0, 'program Vs. disliked_projects')
In [42]:
plot_data = normalize_1_variables(data,"program","tv_shows")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"tv_shows",list(data["tv_shows"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["tv_shows"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs.tv_shows')
Out[42]:
Text(0.5, 1.0, 'program Vs.tv_shows')
In [43]:
plot_data = normalize_1_variables(data,"program","alternate_degree")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"alternate_degree",list(data["alternate_degree"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["alternate_degree"].unique()))
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. alternate_degree')
Out[43]:
Text(0.5, 1.0, 'program Vs. alternate_degree')
In [44]:
plot_data = normalize_1_variables(data,"program","drawing")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"drawing",list(data["drawing"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["drawing"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. drawing')
Out[44]:
Text(0.5, 1.0, 'program Vs. drawing')
In [45]:
plot_data = normalize_1_variables(data,"program","essay")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"essay",list(data["essay"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["essay"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. essay')
Out[45]:
Text(0.5, 1.0, 'program Vs. essay')
In [13]:
plot_data = normalize_1_variables(data,"program","architecture")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"architecture",list(data["architecture"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["architecture"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - architecture')
Out[13]:
Text(0.5, 1.0, 'program Vs. industry - architecture')
In [15]:
plot_data = normalize_1_variables(data,"program","automotive")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"automotive",list(data["automotive"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["automotive"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - automotive')
Out[15]:
Text(0.5, 1.0, 'program Vs. industry - automotive')
In [16]:
plot_data = normalize_1_variables(data,"program","business")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"business",list(data["business"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["business"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - business')
Out[16]:
Text(0.5, 1.0, 'program Vs. industry - business')
In [17]:
plot_data = normalize_1_variables(data,"program","construction")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"construction",list(data["construction"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["construction"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - construction')
Out[17]:
Text(0.5, 1.0, 'program Vs. industry - construction')
In [18]:
plot_data = normalize_1_variables(data,"program","health")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"health",list(data["health"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["health"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - health')
Out[18]:
Text(0.5, 1.0, 'program Vs. industry - health')
In [19]:
plot_data = normalize_1_variables(data,"program","environment")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"environment",list(data["environment"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["environment"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - environment')
Out[19]:
Text(0.5, 1.0, 'program Vs. industry - environment')
In [20]:
plot_data = normalize_1_variables(data,"program","manufacturing")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"manufacturing",list(data["manufacturing"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["manufacturing"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - manufacturing')
Out[20]:
Text(0.5, 1.0, 'program Vs. industry - manufacturing')
In [21]:
plot_data = normalize_1_variables(data,"program","technology")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"technology",list(data["technology"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["technology"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - technology')
Out[21]:
Text(0.5, 1.0, 'program Vs. industry - technology')

Summary of data by program

In [22]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: mech")
data =  data[data.program=='mech']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('mech')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: mech
In [23]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: bmed")
data =  data[data.program=='bmed']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('bmed')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])

top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: bmed
In [24]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: swe")
data =  data[data.program=='swe']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('sft')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: swe
In [25]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: ce")
data =  data[data.program=='ce']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('ce')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])

top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: ce
In [26]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: tron")
data =  data[data.program=='tron']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('tron')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: tron
In [27]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: cive")
data =  data[data.program=='cive']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('cive')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])

top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: cive
In [52]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: chem")
data =  data[data.program=='chem']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('chem')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: chem
In [28]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: syde")
data =  data[data.program=='syde']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('syde')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])

top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: syde
In [29]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: msci")
data =  data[data.program=='msci']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('msci')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: msci
In [30]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: elec")
data =  data[data.program=='elec']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('elec')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])

top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: elec
In [31]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: nano")
data =  data[data.program=='nano']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('nano')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: nano
In [32]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: geo")
data =  data[data.program=='geo']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('geo')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: geo
In [33]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: env")
data =  data[data.program=='env']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('env')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])

top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: env
In [59]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: arch-e")
data =  data[data.program=='arch-e']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('arch-e')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: arch-e
In [34]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: arch")
data =  data[data.program=='arch']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('arch')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])

data['architecture'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="architectureFrequency",
                                    ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="architecturePercent",
                                    ax=axes[17][1])
                                    
data['automotive'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotiveFrequency",
                                    ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="automotivePercent",
                                    ax=axes[18][1])
                                    
data['business'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessFrequency",
                                    ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="businessPercent",
                                    ax=axes[19][1])
                                    
data['construction'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionFrequency",
                                    ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="constructionPercent",
                                    ax=axes[20][1])

data['health'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthFrequency",
                                    ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="healthPercent",
                                    ax=axes[21][1])
                                    
data['environment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentFrequency",
                                    ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="environmentPercent",
                                    ax=axes[22][1])
                                    
data['manufacturing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingFrequency",
                                    ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="manufacturingPercent",
                                    ax=axes[23][1])
                                    
data['technology'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyFrequency",
                                    ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="technologyPercent",
                                    ax=axes[24][1])
top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: arch

Bar Plots Mapping Programs Against Each Variable

In [61]:
print("program vs. happy")
plot_data = normalize_1_variables(data,"program","happy")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="happy",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. happy
In [62]:
print("program vs. problem_type")
plot_data = normalize_1_variables(data,"program","problem_type")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="problem_type",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. problem_type
In [63]:
print("program vs. creative")
plot_data = normalize_1_variables(data,"program","creative")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="creative",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. creative
In [64]:
# print("program vs. industry")
# plot_data = normalize_1_variables(data,"program","industry")
# g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="industry",
#             data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
# g.set_xlabels('')
# g.set_ylabels('percent')

# for ax in g.axes:
#     plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
# plt.subplots_adjust(hspace=0.3)
# plt.show()
In [65]:
print("program vs. outdoors")
plot_data = normalize_1_variables(data,"program","outdoors")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="outdoors",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. outdoors
In [66]:
print("program vs. career")
plot_data = normalize_1_variables(data,"program","career")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="career",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. career
In [67]:
print("program vs. group_work")
plot_data = normalize_1_variables(data,"program","group_work")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="group_work",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. group_work
In [68]:
print("program vs. liked_courses")
plot_data = normalize_1_variables(data,"program","liked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_courses",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. liked_courses
In [69]:
print("program vs. disliked_courses")
plot_data = normalize_1_variables(data,"program","disliked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_courses",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. disliked_courses
In [70]:
print("program vs. programming")
plot_data = normalize_1_variables(data,"program","programming")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="programming",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. programming
In [71]:
print("program vs. join_clubs")
plot_data = normalize_1_variables(data,"program","join_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="join_clubs",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. join_clubs
In [72]:
print("program vs. not_clubs")
plot_data = normalize_1_variables(data,"program","not_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="not_clubs",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. not_clubs
In [73]:
print("program vs. liked_projects")
plot_data = normalize_1_variables(data,"program","liked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_projects",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. liked_projects
In [74]:
print("program vs. disliked_projects")
plot_data = normalize_1_variables(data,"program","disliked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_projects",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. disliked_projects
In [75]:
print("program vs. tv_shows")
plot_data = normalize_1_variables(data,"program","tv_shows")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="tv_shows",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. tv_shows
In [76]:
print("program vs. alternate_degree")
plot_data = normalize_1_variables(data,"program","alternate_degree")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="alternate_degree",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. alternate_degree
In [77]:
print("program vs. expensive_equipment")
plot_data = normalize_1_variables(data,"program","expensive_equipment")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="expensive_equipment",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. expensive_equipment
In [78]:
print("program vs. drawing")
plot_data = normalize_1_variables(data,"program","drawing")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="drawing",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. drawing
In [79]:
print("program vs. essay")
plot_data = normalize_1_variables(data,"program","essay")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="essay",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. essay
In [35]:
print("program vs. architecture")
plot_data = normalize_1_variables(data,"program","architecture")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="architecture",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. architecture
In [36]:
print("program vs. automotive")
plot_data = normalize_1_variables(data,"program","automotive")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="automotive",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. automotive
In [37]:
print("program vs. business")
plot_data = normalize_1_variables(data,"program","business")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="business",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. business
In [38]:
print("program vs. construction")
plot_data = normalize_1_variables(data,"program","construction")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="construction",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. construction
In [39]:
print("program vs. health")
plot_data = normalize_1_variables(data,"program","health")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="health",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. health
In [40]:
print("program vs. environment")
plot_data = normalize_1_variables(data,"program","environment")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="environment",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. environment
In [41]:
print("program vs. manufacturing")
plot_data = normalize_1_variables(data,"program","manufacturing")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="manufacturing",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. manufacturing
In [42]:
print("program vs. technology")
plot_data = normalize_1_variables(data,"program","technology")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="technology",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. technology

Delete later, below is a playground to test ideas with the golf data

In [80]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from data_load import get_encoded_data
import json
import pandas as pd
import numpy as np
from sklearn import preprocessing
import pickle

directory = 'golf_data.csv'
df = pd.read_csv(directory,dtype=str)
df = df.drop(df.columns[[0,6]], axis=1)

print(df.head())
    OUTLOOK TEMPERATURE HUMIDITY  WINDY PLAY
0     Rainy         Hot     High  FALSE   No
1     Rainy         Hot     High   TRUE   No
2  Overcast         Hot     High  FALSE  Yes
3     Sunny        Mild     High  FALSE  Yes
4     Sunny        Cool   Normal  FALSE  Yes
In [81]:
outlook = {'Rainy': 1, 'Overcast': 0, 'Sunny': 2, 'column': 'OUTLOOK'}
temperature = {'Hot': 1, 'Mild': 2, 'Cool': 0, 'column': 'TEMPERATURE'}
humidity = {'High': 0, 'Normal': 1, 'column': 'HUMIDITY'}
windy = {'FALSE': 0, 'TRUE': 1, 'column': 'WINDY'}
In [82]:
col_list = list(df.columns)
encoded_dict_list = []
for col in col_list:
    keys = df[col].unique()
    le = preprocessing.LabelEncoder()
    le.fit(list(keys))
    df[col] = le.transform(list(df[col]))
    vals = df[col].unique()
    keys = list(le.inverse_transform(vals))
    cd = dict(zip(keys,vals))
    cd['column'] = col
    encoded_dict_list.append(cd)
print(encoded_dict_list)
print(df.head())
[{'Rainy': 1, 'Overcast': 0, 'Sunny': 2, 'column': 'OUTLOOK'}, {'Hot': 1, 'Mild': 2, 'Cool': 0, 'column': 'TEMPERATURE'}, {'High': 0, 'Normal': 1, 'column': 'HUMIDITY'}, {'FALSE': 0, 'TRUE': 1, 'column': 'WINDY'}, {'No': 0, 'Yes': 1, 'column': 'PLAY'}]
   OUTLOOK  TEMPERATURE  HUMIDITY  WINDY  PLAY
0        1            1         0      0     0
1        1            1         0      1     0
2        0            1         0      0     1
3        2            2         0      0     1
4        2            0         1      0     1
In [83]:
x_df = df.drop(axis=1,columns=["PLAY"])
y_df = df["PLAY"]

X = np.array(x_df) # convert dataframe into np array
y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(x_df, y_df) # fit the model using training data

cat = df.drop('PLAY',axis=1)
index_dict = dict(zip(cat.columns,range(cat.shape[1])))
In [84]:
with open('nb_model.pkl', 'wb') as fid:
    pickle.dump(model, fid,2)

'''
We need to create our feature vector of exact same dimension as our training set. To convert our user input into dummy variables, we should save a dict of the the dummy variables. Later we can populate our feature vector for prediction using this dict.
'''

with open('cat', 'wb') as fid:
    pickle.dump(index_dict, fid,2)
In [85]:
post_dict = {
'OUTLOOK':'Overcast',
'TEMPERATURE':'Cool',
'HUMIDITY':'Normal',
'WINDY':'FALSE'
}
In [86]:
new_vector = y
new_vector[0] = outlook[post_dict['OUTLOOK']]
new_vector[1] = temperature[post_dict['TEMPERATURE']]
new_vector[2] = humidity[post_dict['HUMIDITY']]
new_vector[3] = windy[post_dict['WINDY']]

new_vector = [new_vector[0:4]]
print(new_vector)

print("Loading model")
pkl_file = open('nb_model.pkl', 'rb')

nb_model = pickle.load(pkl_file)
[array([0, 0, 1, 0])]
Loading model
In [87]:
prediction = nb_model.predict(new_vector)

if prediction == 0:
    response_message  = 'You should not play golf today'
    rm = 'NO'
else:
    response_message = 'You could play golf today'
    rm = 'YES'
print(rm)
YES
In [88]:
prediction = nb_model.predict_proba(new_vector)
print(prediction)
# First index is probability of no, second index is probability of yes
[[0.12912819 0.87087181]]
In [89]:
prediction = nb_model.predict_log_proba(new_vector)
print(prediction)
[[-2.04694964 -0.13826049]]
In [43]:
# Toggle Code
import ipywidgets as widgets
from IPython.display import display, HTML

javascript_functions = {False: "hide()", True: "show()"}
button_descriptions  = {False: "Show code", True: "Hide code"}


def toggle_code(state):

    """
    Toggles the JavaScript show()/hide() function on the div.input element.
    """

    output_string = "<script>$(\"div.input\").{}</script>"
    output_args   = (javascript_functions[state],)
    output        = output_string.format(*output_args)

    display(HTML(output))


def button_action(value):

    """
    Calls the toggle_code function and updates the button description.
    """

    state = value.new

    toggle_code(state)

    value.owner.description = button_descriptions[state]


state = False
toggle_code(state)

button = widgets.ToggleButton(state, description = button_descriptions[state])
button.observe(button_action, "value")

display(button)
In [ ]:
 
In [ ]: